Overview
Data exploration using the data prepared by combining the “schools master lists” from the Department of Basic Education website for schools in South Africa.
# LIBRARIES ----------------------------------------------------------------------------------------
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(here)
here() starts at /home/megan/megan/sa_schools
library(ggplot2)
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
# READ DATA ----------------------------------------------------------------------------------------
sa_schools <- readRDS(here::here("data/03_sa_schools.RDS"))
Learner and teacher numbers
# VISUALISE ----------------------------------------------------------------------------------------
p <- ggplot(sa_schools, aes(x = Learners)) +
geom_histogram() +
labs(title='Histogram of no. of learners',
x= 'No. of learners', y = 'School count')
ggplotly(p)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Removed 309 rows containing non-finite values (stat_bin).
p <- ggplot(sa_schools, aes(x = Educators)) +
geom_histogram() +
labs(title='Histogram of no. of educators',
x= 'No. of educators', y = 'School count')
ggplotly(p)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Removed 683 rows containing non-finite values (stat_bin).
Country and provincial characteristics
sa_schools <- sa_schools %>%
mutate(Quintile = factor(sa_schools$Quintile))
Quintiles
p <- ggplot(sa_schools, aes(x = Quintile, fill = Quintile)) +
geom_bar() +
labs(title='Distribution of schools in South Africa across "quintiles"',
x= 'Quintile', y = 'School count') +
scale_fill_brewer(palette="Set1", direction = 1, na.value = "grey")
ggplotly(p)
sa_schools_quint <- sa_schools %>%
group_by(Province, Quintile) %>%
summarise(count = n()) %>%
#filter(Quintile != "NA") %>%
mutate(Quintile = factor(Quintile, levels = c("5", "4", "3", "2", "1", "NA")))
Factor `Quintile` contains implicit NA, consider using `forcats::fct_explicit_na`
p <- ggplot(sa_schools_quint) +
geom_bar(aes(x = Province, y = count, fill = Quintile), stat="identity") +
scale_fill_brewer(palette="Set1", direction = -1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
labs(title='Distribution of schools in each province in South Africa across "quintiles"',
x= '', y = 'School count')
ggplotly(p)
Rural vs Urban
p <- ggplot(sa_schools, aes(x = Urban_Rural, fill = Urban_Rural)) +
geom_bar() +
labs(title='Rural vs urban schools in South Africa',
x= '', y = 'School count') +
scale_fill_brewer(palette="Set1", direction = 1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
ggplotly(p)
sa_schools_ur <- sa_schools %>%
group_by(Province, Urban_Rural) %>%
summarise(count = n())
Factor `Urban_Rural` contains implicit NA, consider using `forcats::fct_explicit_na`
p <- ggplot(sa_schools_ur) +
geom_bar(aes(x = Province, y = count, fill = Urban_Rural), stat="identity") +
scale_fill_brewer(palette="Set1", direction = -1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
labs(title='Urban vs Rural schools in each province in South Africa"',
x= '', y = 'School count')
ggplotly(p)
Sector type
p <- ggplot(sa_schools, aes(x = Sector, fill = Sector)) +
geom_bar() +
labs(title='Sector type of schools in South Africa',
x= '', y = 'School count') +
scale_fill_brewer(palette="Set1", direction = 1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
ggplotly(p)
sa_schools_sec <- sa_schools %>%
group_by(Province, Sector) %>%
summarise(count = n())
p <- ggplot(sa_schools_sec) +
geom_bar(aes(x = Province, y = count, fill = Sector), stat="identity") +
scale_fill_brewer(palette="Set1", direction = -1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
labs(title='Sector type schools in each province in South Africa"',
x= '', y = 'School count')
ggplotly(p)
Phase
p <- ggplot(sa_schools, aes(x = Phase, fill = Phase)) +
geom_bar() +
labs(title='Distribution of phase of schools in South Africa',
x= '', y = 'School count') +
scale_fill_brewer(palette="Set1", direction = 1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
ggplotly(p)
sa_schools_phase <- sa_schools %>%
group_by(Province, Phase) %>%
summarise(count = n())
p <- ggplot(sa_schools_phase) +
geom_bar(aes(x = Province, y = count, fill = Phase), stat="identity") +
scale_fill_brewer(palette="Set1", direction = -1, na.value = "grey") +
theme(axis.text.x = element_text(angle = 40, hjust = 1)) +
labs(title='Split between phase of schools in each province in South Africa"',
x= '', y = 'School count')
ggplotly(p)
LS0tCnRpdGxlOiAiU0EgU2Nob29scyBkYXRhIGV4cGxvcmF0aW9uIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIE92ZXJ2aWV3CgpEYXRhIGV4cGxvcmF0aW9uIHVzaW5nIHRoZSBkYXRhIHByZXBhcmVkIGJ5IGNvbWJpbmluZyB0aGUgInNjaG9vbHMgbWFzdGVyIGxpc3RzIiBmcm9tIHRoZSBEZXBhcnRtZW50IG9mIEJhc2ljIEVkdWNhdGlvbiB3ZWJzaXRlIGZvciBzY2hvb2xzIGluIFNvdXRoIEFmcmljYS4KCmBgYHtyfQojIExJQlJBUklFUyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoaGVyZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKCiMgUkVBRCBEQVRBIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0Kc2Ffc2Nob29scyA8LSByZWFkUkRTKGhlcmU6OmhlcmUoImRhdGEvMDNfc2Ffc2Nob29scy5SRFMiKSkKCmBgYAoKIyBMZWFybmVyIGFuZCB0ZWFjaGVyIG51bWJlcnMKYGBge3J9CiMgVklTVUFMSVNFIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KCnAgPC0gZ2dwbG90KHNhX3NjaG9vbHMsIGFlcyh4ID0gTGVhcm5lcnMpKSArCiAgZ2VvbV9oaXN0b2dyYW0oKSArCiAgbGFicyh0aXRsZT0nSGlzdG9ncmFtIG9mIG5vLiBvZiBsZWFybmVycycsCiAgICAgICB4PSAnTm8uIG9mIGxlYXJuZXJzJywgeSA9ICdTY2hvb2wgY291bnQnKQoKZ2dwbG90bHkocCkKCnAgPC0gZ2dwbG90KHNhX3NjaG9vbHMsIGFlcyh4ID0gRWR1Y2F0b3JzKSkgKwogIGdlb21faGlzdG9ncmFtKCkgKwogIGxhYnModGl0bGU9J0hpc3RvZ3JhbSBvZiBuby4gb2YgZWR1Y2F0b3JzJywKICAgICAgIHg9ICdOby4gb2YgZWR1Y2F0b3JzJywgeSA9ICdTY2hvb2wgY291bnQnKQoKZ2dwbG90bHkocCkKYGBgCgojIENvdW50cnkgYW5kIHByb3ZpbmNpYWwgY2hhcmFjdGVyaXN0aWNzCmBgYHtyfQpzYV9zY2hvb2xzIDwtIHNhX3NjaG9vbHMgJT4lCiAgbXV0YXRlKFF1aW50aWxlID0gZmFjdG9yKHNhX3NjaG9vbHMkUXVpbnRpbGUpKQpgYGAKCiMjIyBRdWludGlsZXMKYGBge3J9CnAgPC0gZ2dwbG90KHNhX3NjaG9vbHMsIGFlcyh4ID0gUXVpbnRpbGUsIGZpbGwgPSBRdWludGlsZSkpICsKICBnZW9tX2JhcigpICsKICBsYWJzKHRpdGxlPSdEaXN0cmlidXRpb24gb2Ygc2Nob29scyBpbiBTb3V0aCBBZnJpY2EgYWNyb3NzICJxdWludGlsZXMiJywKICAgICAgIHg9ICdRdWludGlsZScsIHkgPSAnU2Nob29sIGNvdW50JykgKwogICBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSJTZXQxIiwgZGlyZWN0aW9uID0gMSwgbmEudmFsdWUgPSAiZ3JleSIpCgpnZ3Bsb3RseShwKQpgYGAKCgpgYGB7cn0Kc2Ffc2Nob29sc19xdWludCA8LSBzYV9zY2hvb2xzICU+JQogIGdyb3VwX2J5KFByb3ZpbmNlLCBRdWludGlsZSkgJT4lCiAgc3VtbWFyaXNlKGNvdW50ID0gbigpKSAlPiUKICAjZmlsdGVyKFF1aW50aWxlICE9ICJOQSIpICU+JQogIG11dGF0ZShRdWludGlsZSA9IGZhY3RvcihRdWludGlsZSwgbGV2ZWxzID0gYygiNSIsICI0IiwgIjMiLCAiMiIsICIxIiwgIk5BIikpKQoKcCA8LSBnZ3Bsb3Qoc2Ffc2Nob29sc19xdWludCkgKwogIGdlb21fYmFyKGFlcyh4ID0gUHJvdmluY2UsIHkgPSBjb3VudCwgZmlsbCA9IFF1aW50aWxlKSwgc3RhdD0iaWRlbnRpdHkiKSArCiAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0iU2V0MSIsIGRpcmVjdGlvbiA9IC0xLCBuYS52YWx1ZSA9ICJncmV5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDAsIGhqdXN0ID0gMSkpICsKICBsYWJzKHRpdGxlPSdEaXN0cmlidXRpb24gb2Ygc2Nob29scyBpbiBlYWNoIHByb3ZpbmNlIGluIFNvdXRoIEFmcmljYSBhY3Jvc3MgInF1aW50aWxlcyInLAogICAgICAgeD0gJycsIHkgPSAnU2Nob29sIGNvdW50JykKCmdncGxvdGx5KHApCmBgYAoKIyMgUnVyYWwgdnMgVXJiYW4KYGBge3J9CnAgPC0gZ2dwbG90KHNhX3NjaG9vbHMsIGFlcyh4ID0gVXJiYW5fUnVyYWwsIGZpbGwgPSBVcmJhbl9SdXJhbCkpICsKICBnZW9tX2JhcigpICsKICBsYWJzKHRpdGxlPSdSdXJhbCB2cyB1cmJhbiBzY2hvb2xzIGluIFNvdXRoIEFmcmljYScsCiAgICAgICB4PSAnJywgeSA9ICdTY2hvb2wgY291bnQnKSArCiAgIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IlNldDEiLCBkaXJlY3Rpb24gPSAxLCBuYS52YWx1ZSA9ICJncmV5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDAsIGhqdXN0ID0gMSkpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpzYV9zY2hvb2xzX3VyIDwtIHNhX3NjaG9vbHMgJT4lCiAgZ3JvdXBfYnkoUHJvdmluY2UsIFVyYmFuX1J1cmFsKSAlPiUKICBzdW1tYXJpc2UoY291bnQgPSBuKCkpIAoKcCA8LSBnZ3Bsb3Qoc2Ffc2Nob29sc191cikgKwogIGdlb21fYmFyKGFlcyh4ID0gUHJvdmluY2UsIHkgPSBjb3VudCwgZmlsbCA9IFVyYmFuX1J1cmFsKSwgc3RhdD0iaWRlbnRpdHkiKSArCiAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0iU2V0MSIsIGRpcmVjdGlvbiA9IC0xLCAgbmEudmFsdWUgPSAiZ3JleSIpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQwLCBoanVzdCA9IDEpKSArCiAgbGFicyh0aXRsZT0nVXJiYW4gdnMgUnVyYWwgc2Nob29scyBpbiBlYWNoIHByb3ZpbmNlIGluIFNvdXRoIEFmcmljYSInLAogICAgICAgeD0gJycsIHkgPSAnU2Nob29sIGNvdW50JykKCmdncGxvdGx5KHApCmBgYAoKCiMjIFNlY3RvciB0eXBlCmBgYHtyfQpwIDwtIGdncGxvdChzYV9zY2hvb2xzLCBhZXMoeCA9IFNlY3RvciwgZmlsbCA9IFNlY3RvcikpICsKICBnZW9tX2JhcigpICsKICBsYWJzKHRpdGxlPSdTZWN0b3IgdHlwZSBvZiBzY2hvb2xzIGluIFNvdXRoIEFmcmljYScsCiAgICAgICB4PSAnJywgeSA9ICdTY2hvb2wgY291bnQnKSArCiAgIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IlNldDEiLCBkaXJlY3Rpb24gPSAxLCBuYS52YWx1ZSA9ICJncmV5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDAsIGhqdXN0ID0gMSkpCgpnZ3Bsb3RseShwKQpgYGAKCmBgYHtyfQpzYV9zY2hvb2xzX3NlYyA8LSBzYV9zY2hvb2xzICU+JQogIGdyb3VwX2J5KFByb3ZpbmNlLCBTZWN0b3IpICU+JQogIHN1bW1hcmlzZShjb3VudCA9IG4oKSkgCgpwIDwtIGdncGxvdChzYV9zY2hvb2xzX3NlYykgKwogIGdlb21fYmFyKGFlcyh4ID0gUHJvdmluY2UsIHkgPSBjb3VudCwgZmlsbCA9IFNlY3RvciksIHN0YXQ9ImlkZW50aXR5IikgKwogIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IlNldDEiLCBkaXJlY3Rpb24gPSAtMSwgIG5hLnZhbHVlID0gImdyZXkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0MCwgaGp1c3QgPSAxKSkgKwogIGxhYnModGl0bGU9J1NlY3RvciB0eXBlIHNjaG9vbHMgaW4gZWFjaCBwcm92aW5jZSBpbiBTb3V0aCBBZnJpY2EiJywKICAgICAgIHg9ICcnLCB5ID0gJ1NjaG9vbCBjb3VudCcpCgpnZ3Bsb3RseShwKQpgYGAKCgojIyBQaGFzZQpgYGB7cn0KcCA8LSBnZ3Bsb3Qoc2Ffc2Nob29scywgYWVzKHggPSBQaGFzZSwgZmlsbCA9IFBoYXNlKSkgKwogIGdlb21fYmFyKCkgKwogIGxhYnModGl0bGU9J0Rpc3RyaWJ1dGlvbiBvZiBwaGFzZSBvZiBzY2hvb2xzIGluIFNvdXRoIEFmcmljYScsCiAgICAgICB4PSAnJywgeSA9ICdTY2hvb2wgY291bnQnKSArCiAgIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IlNldDEiLCBkaXJlY3Rpb24gPSAxLCBuYS52YWx1ZSA9ICJncmV5IikgICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQwLCBoanVzdCA9IDEpKQoKZ2dwbG90bHkocCkKYGBgCgpgYGB7cn0Kc2Ffc2Nob29sc19waGFzZSA8LSBzYV9zY2hvb2xzICU+JQogIGdyb3VwX2J5KFByb3ZpbmNlLCBQaGFzZSkgJT4lCiAgc3VtbWFyaXNlKGNvdW50ID0gbigpKSAKCnAgPC0gZ2dwbG90KHNhX3NjaG9vbHNfcGhhc2UpICsKICBnZW9tX2JhcihhZXMoeCA9IFByb3ZpbmNlLCB5ID0gY291bnQsIGZpbGwgPSBQaGFzZSksIHN0YXQ9ImlkZW50aXR5IikgKwogIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IlNldDEiLCBkaXJlY3Rpb24gPSAtMSwgIG5hLnZhbHVlID0gImdyZXkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0MCwgaGp1c3QgPSAxKSkgKwogIGxhYnModGl0bGU9J1NwbGl0IGJldHdlZW4gcGhhc2Ugb2Ygc2Nob29scyBpbiBlYWNoIHByb3ZpbmNlIGluIFNvdXRoIEFmcmljYSInLAogICAgICAgeD0gJycsIHkgPSAnU2Nob29sIGNvdW50JykKCmdncGxvdGx5KHApCmBgYAo=